total_env_steps= 50000000,
eval_frequency= 10
seed= 0
num_envs= 4
action_repeat= 1
unroll_length= 30
batch_size= 4
num_minibatches= 1
num_update_epochs= 1
reward_scaling= 10.0
entropy_cost= 3e-4
episode_length= 1000
discounting= 0.99
learning_rate= 5e-4
max_gradient_norm= 1e9
normalize_observations= True
max_devices_per_host= None
# SAC hps.
min_replay_size= 8192
max_replay_size= 1048576
grad_updates_per_step= 1.0
